On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis thaliana. Le CO2, au cours des études préliminaires, s’est montré peu influent en conditions contrôles de fer et de nitrates, et accentué en cas de stress nutritionnel. Nous reprennons ces résultats avec des fonctions génériques et propres pour en faire le résumé et de jolis graphes.
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## number of items read is not a multiple of the number of columns
On a, pour chaque gène et chaque condition, son niveau d’expression en sortie de quantification. On labelle les conditions avec le code suivant : lettre majuscule pour le niveau fort, minuscule pour le niveau faible. Le réplicat est donné après l’underscore.
# quantification file
data <- read.csv("quantifFiles/QuantifGenes.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
getLabel("R6")[1] "cnF_3"
[1] At_AmbientCO2_LowNitrate_Fe1
48 Levels: At_AmbientCO2_HighNitrate_Fe1 ... Sl_ElevatedCO2_LowNitrate_FeStarvation3
[1] "At_AmbientCO2_LowNitrate_Fe"
keep <- rowSums(data) >= 10
data <- data[keep, ]
group <- sapply(colnames(data), getLabel, with.rep = F)
colnames(data) <- sapply(colnames(data), getLabel)
head(data) cNF_3 cNF_2 cNF_1 cnF_2 cnF_1 cnF_3 CNF_1 CnF_2 CnF_1 CnF_3 cNf_1
AT1G01010 1526 1006 1116 1275 967 1018 854 1132 1294 1364 2325
AT1G01020 416 285 289 349 364 370 226 513 502 561 461
AT1G01030 31 15 19 29 36 28 12 47 34 47 18
cnf_2 cnf_1 cNf_2 cNf_3 cnf_3 Cnf_3 CNf_1 Cnf_1 Cnf_2 CNf_3 CNf_2
AT1G01010 2113 2193 2564 2364 2074 1987 2027 1754 1697 1537 1898
AT1G01020 407 432 614 380 502 484 467 426 415 413 462
AT1G01030 40 32 44 37 27 42 39 36 40 37 37
CNF_3 CNF_2
AT1G01010 816 912
AT1G01020 223 312
AT1G01030 15 19
[ reached 'max' / getOption("max.print") -- omitted 3 rows ]
[1] 23342 24
On définit les conditions contrôle comme suit : CO2 ambiant et fort fer.
g = list()
method = "edger"
labels <- c("cNF", "cNf")
genes1 <- dualDE(data, labels, pval = 0.01, method = method) (Intercept) groupcNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "cNf_1" "cNf_2" "cNf_3"
cNF_3 cNF_2 cNF_1 cNf_1 cNf_2 cNf_3
0.9844303 0.9894266 0.9466501 1.0201203 1.0475235 1.0118491
[1] "7341 genes DE"
(Intercept) groupCNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "CNf_1" "CNf_3" "CNf_2"
CNF_1 CNF_3 CNF_2 CNf_1 CNf_3 CNf_2
0.9778008 0.9263592 0.9875116 1.0165429 1.0509593 1.0408262
[1] "7531 genes DE"
(Intercept) groupcnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cnF_2" "cnF_1" "cnF_3" "cnf_2" "cnf_1" "cnf_3"
cnF_2 cnF_1 cnF_3 cnf_2 cnf_1 cnf_3
0.9904890 1.0044366 0.9970741 0.9941176 1.0125589 1.0013239
[1] "10410 genes DE"
(Intercept) groupCnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CnF_2" "CnF_1" "CnF_3" "Cnf_3" "Cnf_1" "Cnf_2"
CnF_2 CnF_1 CnF_3 Cnf_3 Cnf_1 Cnf_2
1.0422944 1.0193300 1.0235403 0.9418015 0.9881213 0.9849125
[1] "9520 genes DE"
On visualise les gènes différentiellement exprimés en commun entre les différents niveaux des autres facteurs.
library(ggVennDiagram)
library(VennDiagram)
gene_list <- list()
for (comp in names(g)) {
print(g[[comp]])
gene_list[[comp]] <- g[[comp]]$gene_id
} gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT1G13609 7.022011 11.082911 0.000000e+00 0.000000e+00 1 1
2 AT5G46900 7.660301 -7.607017 1.409723e-238 1.645287e-234 2 1
3 AT4G16370 11.521065 4.656174 2.715051e-238 2.112491e-234 3 1
4 AT1G52120 7.149774 6.073429 9.140222e-228 5.333777e-224 4 1
5 AT2G14247 7.965909 7.154585 8.076178e-223 3.770283e-219 5 1
6 AT1G17180 9.627023 5.335590 3.027245e-222 1.177699e-218 6 1
7 AT1G56430 9.400294 4.597719 6.213489e-216 2.071932e-212 7 1
8 AT5G46890 8.210122 -6.222259 1.706847e-210 4.980152e-207 8 1
9 AT3G03660 9.155803 4.515112 1.410258e-208 3.657583e-205 9 1
upreg
1 1
2 0
3 1
4 1
5 1
6 1
7 1
8 0
9 1
[ reached 'max' / getOption("max.print") -- omitted 7332 rows ]
gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT5G46900 -2.678878 -13.407381 0.000000e+00 0.000000e+00 1 1
2 AT5G46890 6.968087 -8.694677 1.492985e-312 1.742463e-308 2 1
3 AT5G44130 7.112045 -7.240253 6.527318e-281 5.078689e-277 3 1
4 AT1G13609 5.865935 12.253662 4.622108e-280 2.697231e-276 4 1
5 AT4G12520 7.455945 -7.046251 9.796516e-252 4.573406e-248 5 1
6 AT2G14247 7.842448 6.150042 1.889832e-246 7.352077e-243 6 1
7 AT4G33790 7.405395 -5.522115 1.501468e-219 5.006754e-216 7 1
8 AT4G15290 8.210109 -4.836956 6.657053e-219 1.942362e-215 8 1
9 AT1G56430 9.131468 5.086270 4.343399e-214 1.126485e-210 9 1
upreg
1 0
2 0
3 0
4 1
5 0
6 1
7 0
8 0
9 1
[ reached 'max' / getOption("max.print") -- omitted 7522 rows ]
gene_id a.value m.value p.value q.value rank estimatedDEG upreg
1 AT1G19900 8.649998 -5.822602 0 0 13 1 0
2 AT1G53830 8.899211 -4.863803 0 0 13 1 0
3 AT1G54970 8.404367 -5.667750 0 0 13 1 0
4 AT1G69880 7.979638 5.814494 0 0 13 1 1
5 AT2G30670 9.987564 6.434368 0 0 13 1 1
6 AT2G39040 7.134149 -7.226335 0 0 13 1 0
7 AT3G08860 9.230476 6.747882 0 0 13 1 1
8 AT3G44300 9.256365 6.105301 0 0 13 1 1
9 AT3G62680 9.700786 -5.044145 0 0 13 1 0
[ reached 'max' / getOption("max.print") -- omitted 10401 rows ]
gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT1G13609 4.937553 12.969206 0.000000e+00 0.000000e+00 4.5 1
2 AT2G29460 8.151641 5.503782 0.000000e+00 0.000000e+00 4.5 1
3 AT2G43510 5.518930 10.224305 0.000000e+00 0.000000e+00 4.5 1
4 AT3G44300 9.877015 5.799534 0.000000e+00 0.000000e+00 4.5 1
5 AT4G31970 7.704916 11.159530 0.000000e+00 0.000000e+00 4.5 1
6 AT4G33710 10.709176 7.716482 0.000000e+00 0.000000e+00 4.5 1
7 AT5G46890 6.542092 -9.060491 0.000000e+00 0.000000e+00 4.5 1
8 AT5G46900 6.312931 -9.442531 0.000000e+00 0.000000e+00 4.5 1
9 AT5G20230 8.858676 5.952530 4.298371e-322 1.114805e-318 9.0 1
upreg
1 1
2 1
3 1
4 1
5 1
6 1
7 0
8 0
9 1
[ reached 'max' / getOption("max.print") -- omitted 9511 rows ]
partitions <- get.venn.partitions(gene_list, keep.elements = T)
partitions$shared <- rowSums(partitions[1:4])
partitions <- partitions[order(-partitions$shared), ]
# common_genes <- unlist(partitions[1, '..values..']) results <- getBM( filters =
# 'ensembl_gene_id', attributes = c('ensembl_gene_id', 'description',
# 'external_gene_name', 'entrezgene_id'), values = common_genes, mart = mart)
# results <- results[!rownames(results) %in%
# which(duplicated(results$ensembl_gene_id)), ] kable(results)
sharedBy3 <- unique(unlist(subset(partitions, partitions$shared == 3)$..values..))
a <- OntologyProfile(sharedBy3, specie = specie)d <- data.frame(matrix(ncol = 4, nrow = 2))
colnames(d) <- names(g)
row.names(d) <- c("up", "down")
for (comp in names(g)) {
d["up", comp] <- sum(g[[comp]]$upreg == 1)
d["down", comp] <- sum(g[[comp]]$upreg == 0)
}
res <- melt(d)
res$reg = rep(c("up", "down"), 4)
genesIronAt <- res
save(genesIronAt, file = paste0("genesIron", specie, ".RData"))
ggplot(res, aes(fill = reg, y = value, x = variable)) + geom_bar(position = "stack",
stat = "identity", alpha = 0.5, color = "black") + ggtitle("Iron effet on gene regulation") +
xlab("") + ylab("Number of differentially expressed genes") + coord_flip()